#!/bin/bash

set -e

data_dir=`pwd`/data
if [ ! -d ${data_dir} ]; then
	mkdir ${data_dir}
fi

echo -e "\ndownloading the test dataset..."
orig_data=${data_dir}/dev-v1.1.json
if [ -f ${orig_data} ]; then
	echo "use cache: ${orig_data}"
else
	wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json \
		 -O ${orig_data} --no-check-certificate
	echo "download successiful, test data saved in ${orig_data}"
fi

echo -e "\ndata preprocessing..."
cd ./Bert_Base_Uncased_for_Pytorch
bin_dir=${data_dir}/bert_bin
python3 bert_preprocess_data.py \
	--max_seq_length 384 \
	--do_lower_case \
	--vocab_file ./DeepLearningExamples/PyTorch/LanguageModeling/BERT/vocab/vocab \
	--predict_file ${orig_data} \
	--save_dir ${bin_dir}
echo "done. bin files saved in ${bin_dir}"
cd ..
